## Replication code for Helgason (2015)
## "Fractional Integration Methods and Short Time Series"
## July 2015

# Set up working environment
rm( list=ls() )
library(mosaic)
library(arfima)
library(fracdiff)
library(tseries)
library(ggplot2)
library(plyr)
library(scales)

### Custom functions ###

# Function to calculate first difference of a series
d.<-function(x,first=NA) {
  ts(c(first,diff(x)))
}

# Function to calculate lag of a series
l.<-function(x,first=NA){
  c(first, head(x,-1))
}

# Function to calculate LRM of an ECM
lrm = function(ecm,alpha="l.(y)",beta="l.(x)") {
  # Function takes an ecm object and calculates estimate of LRM
  as.numeric(coef(ecm)[beta]/-coef(ecm)[alpha])
}

# Function to calculate SE of LRM
lrm.se = function(ecm,alpha="l.(y)",beta="l.(x)") {
  # Function takes an lm object and the names of two variables and calculates the SE of their ratio
  a=-coef(ecm)[alpha]
  b=coef(ecm)[beta]
  a.var=vcov(ecm)[alpha,alpha]
  b.var=vcov(ecm)[beta,beta]
  cov = vcov(ecm)[alpha,beta]
  # Formula below is from Banarjee et al. (1993:63)
  as.numeric(sqrt((1/a)^2*b.var+(b/a^2)^2*a.var+2*(1/a)*(b/a^2)*cov))
}

# Function to calculate coverage probabilities - from Carsey and Harden (2014:94)
coverage <- function(b,se,true,level=.95, df=Inf) { 
  #Estimate, standard error, true parameter, confidence interval, deg. freedom
  qtile <- level + (1-level)/2 #computes proper quintile
  lower.bound <- b - qt(qtile,df=df)*se
  upper.bound <- b + qt(qtile,df=df)*se
  # Is the true parameter in the CI? (yes=1)
  true.in.ci <- ifelse(true >= lower.bound & true <=upper.bound,1,0)
  cp <- mean(true.in.ci) # The coverage probability
  mc.lower.bound <- cp - 1.96*sqrt((cp*(1-cp))/length(b)) # Monte Carlo error
  mc.upper.bound <- cp + 1.96*sqrt((cp*(1-cp))/length(b))
  return(list(cp = cp,
              mc.eb = c(mc.lower.bound, mc.upper.bound)))  
}

# Custom function to select best ARFIMA model based on AIC
bestarfima <- function(ts,max.nar=1,int=c(0,1),max.nma=1,d0=c(0)) { 
  # ts is a univariate time series, 
  # max.nar is the max number of AR components to add, 
  # int indicates integration orders to add
  # max.nma is the max number of MA components to add, 
  # d0 specifies whether we want to test models which impose d equals 0 (1==yes, 0==no)
  nar = seq(0,max.nar)
  nma = seq(0,max.nma)
  
  # Determine combinations to test
  comb = expand.grid(nar,int,nma,d0)
  
  # Run models and save loglik in matrix
  models = list()
  aic = matrix(NA,nrow(comb),1)
  for (i in 1:nrow(comb)) {
    if (comb[i,4]==0) { # Is d to be estimated from the data?
      # Save loglik and find AIC - wrap in try to catch errors
      models[[length(models)+1]] = try(bestModes(arfima(ts,order=c(comb[i,1],comb[i,2],comb[i,3]),quiet=TRUE),1),silent=TRUE)
      aic[i,1] = try(AIC(models[[length(models)]])[1],silent=TRUE)
    } else {
      models[[length(models)+1]] = try(bestModes(arfima(ts,order=c(comb[i,1],comb[i,2],comb[i,3]),fixed=list(frac=0),quiet=TRUE),1),silent=TRUE)
      # The arfima function provides the incorrect BIC when d is fixed. Calculate AIC manually
      k = 2 + length(models[[length(models)]]$modes[[1]]$phi) + length(models[[length(models)]]$modes[[1]]$theta) # Number of actual parameters in model: Fitted mean + sigma + nar + nma
      aic[i,1] = try(2*k-2*models[[length(models)]]$modes[[1]]$loglik,silent=TRUE)     
    }
  } 
  
  # Find row with lowest AIC value and use that combination to save full model
  m=models[[which.min(aic[,1])]]
  
  # We want to save residuals. However, if series is deemed I(1), the first row will be removed (since the series is first diff)
  # So if chosen model is I(1), we prepend residuals with 1 NA
  residuals = as.numeric(unlist(residuals(m)$Mode1))
  if (length(residuals)!=length(ts)) { residuals=c(NA,residuals)}
  if (m[[6]][[1]][["dfrac"]]==0) {
    list = list("residuals"= residuals, # save (0,0,0) residuals
                "d"= 0,
                "d.se" = 0,
                "phi" = ifelse(length(m[[6]][[1]]$phi)==0,0,m[[6]][[1]]$phi), # Save ar component
                "theta" = ifelse(length(m[[6]][[1]]$theta)==0,0,m[[6]][[1]]$theta), # Save ma component
                "sigma"= m[[6]][[1]]$sigma2, # Save sigma estimate
                "mu" = m[[6]][[1]]$muHat, # Save fitted mean
                "dint" = m[[3]]) # Save whether integration was required to make stationary 
  } else {  
    list = list("residuals"= residuals, # save (0,0,0) residuals
                "d"= as.numeric(summary(m)[[5]][[1]]["d.f",1]), # save d estimate
                "d.se" = as.numeric(summary(m)[[5]][[1]]["d.f",2]), # save se of d estimate
                "phi" = ifelse(length(m[[6]][[1]]$phi)==0,0,m[[6]][[1]]$phi), # Save number of ar components used
                "theta" = ifelse(length(m[[6]][[1]]$theta)==0,0,m[[6]][[1]]$theta), # Save number of ma components used
                "sigma"= m[[6]][[1]]$sigma2, # Save sigma estimate
                "mu" = m[[6]][[1]]$muHat, # Save fitted mean  
                "dint" = m[[3]]) # Save whether integration was required to make stationary 
  }
  return(list)
}  

###################################################################################

# Specify simulation parameters
set.seed(12341)

# Fixed across simulations
sims= 500
theta.x = 0
theta.ey = 0

# Varying across simulations
N=c(50,100,250,1000)
dfrac.x = c(-0.2,0.4)
dfrac.ey = c(-0.2,0.2,0.0,0.4)
phi.x = c(0.0,0.5)
phi.ey = c(0.0,0.5)
beta.x = c(0.0,0.5) 
max.nar = c(0,1)
max.nma = c(0,1)

# All combinations of varying parameters
cond = expand.grid(N,dfrac.x,dfrac.ey,phi.x,phi.ey,beta.x,max.nar,max.nma)
colnames(cond) = c("N","dfrac.x","dfrac.ey","phi.x","phi.ey","beta.x","max.nar","max.nma")

# Don't want to use all combinations of parameters. Subset dataframe to reflect that

# Short run dynamics are either on or off
cond = subset(cond, phi.x == phi.ey & max.nar == max.nma & ((max.nar==0 & phi.x==0) | (max.nar > 0 & phi.x>0)))

# Select cointegration relationships to include
cond = subset(cond, dfrac.x==-0.2 & dfrac.ey==0.2 & beta.x>0 | 
                dfrac.x==-0.2 & dfrac.ey==-0.2 & beta.x==0 | 
                dfrac.x==0.4 & dfrac.ey == 0.0 & beta.x>0 |
                dfrac.x==0.4 & dfrac.ey == 0.4 & beta.x==0)

# Order data frame
cond = cond[with(cond, order(dfrac.x,beta.x, phi.x)), ]

# Set order of integration based on value of dfrac
cond$dint.x = ifelse(cond$dfrac.x>=0,0,1)
cond$dint.ey = ifelse(cond$dfrac.ey>=0,0,1)
cond$dint.y = ifelse(cond$beta.x>0,cond$dint.x,cond$dint.ey)

save(cond,file=paste(format(Sys.time(),'_%Y%m%d_%H%M'),"_cond.Rdata",sep=""))

# Create matrices that save results

# Results from ECMs - no fractional integration
ecmqi=10 # Number of Quantities of interest
ecmresults = matrix(NA,nrow=sims*nrow(cond),ncol=ecmqi)
ecmresults[,1] = rep(1:nrow(cond), each=sims)
colnames(ecmresults) = c("cond","a","a.se","b0","b0.se","b1","b1.se","LRM","LRM.se","type")
# Condition no.
# estimate of adjustment parameter,
# SE of adjustment parameter
# estimate of short term effect of beta.x,
# SE of short term effect of beta.x,
# estimate of lagged effect of beta.x,
# SE of lagged effect of beta.x
# LRM
# Se of LRM
# Type of model estimated

# Results for fractionally differenced ECMs
fdqi=26
fdresults = matrix(NA,nrow=sims*nrow(cond),ncol=fdqi)
fdresults[,1] = rep(1:nrow(cond), each=sims)
colnames(fdresults) = c("cond","fecm","fecm.se","b.dx","b.dx.se","b.lx","b.lx.se",
                        "d.x","d.y","d.fecm",
                        "phi.x","phi.y","phi.fecm",
                        "theta.x","theta.y","theta.fecm",
                        "dint.x","dint.y","dint.fecm",
                        "sigma.x", "sigma.y", "sigma.fecm",
                        "mu.x", "mu.y", "mu.fecm",
                        "type")
# Condition no.
# estimate of FECM 
# SE of FECM
# estimate of beta.x
# SE of beta.x
# estimate of short term effect of x,
# SE of short term effect of x,
# estimate of lagged effect of x,
# SE of lagged effect of x,
# Estimate of d.x, estimate of d.y, estimate of d.fecm
# Estimate of AR parameters in x,y, fecm
# Estimate of MA parameters in x, y, fecm
# Integration required for x, y, fecm
# Sigma estimate from ARFIMA models for x, y, fecm
# Sample mean estimate from ARFIMA models for x,y,fecm
# Type of model estimated

###############
ptm <- proc.time()
for(j in 1:nrow(cond)) {
  
  treat = cond[j,] 
  cat("\n", "\n", "Condition", j, "of", nrow(cond),"\n")
  prog <- txtProgressBar(min=0, max=sims, style=3)
  
  for(i in 1:sims){
    setTxtProgressBar(prog, value=i) 
    
    ###################################################################################################################
    ## SIMULATE DATA
    ###################################################################################################################
    
    # Simulate independent variable
    ex <- rnorm(treat$N)
    x <- arfima.sim(n = treat$N, model=list(phi = treat$phi.x,dfrac = treat$dfrac.x, dint = treat$dint.x), innov=ex)
    
    # Simulate dependent variable    
    ey <- arfima.sim(n = treat$N, model=list(phi = treat$phi.ey,dfrac = treat$dfrac.ey,dint = treat$dint.ey))  
    y = treat$beta.x*x  + ey
    
    ###################################################################################################################
    ## Estimate model without considering fractional dynamics
    ###################################################################################################################
    
    # Begin by establishing the order of integration of the variables and chose model based on that. 
    # This is based on true value of integration, but could be tested with unit root tests
    
    # Are both variables I(1)?
    if (treat$dint.x ==  1 & treat$dint.y==1) {
      # Is residual I(0)?
      if (treat$dint.ey==0) {
        ecmresults[i+((j-1)*sims),10] = 1 # Cointegration --> ECM ok.    
        ecm <- lm(d.(y) ~ l.(y) + d.(x) + l.(x) -1)
        ecmresults[i+((j-1)*sims),8] = lrm(ecm)
        ecmresults[i+((j-1)*sims),9] = lrm.se(ecm)
        
      } else {
        ecmresults[i+((j-1)*sims),10] = 2 # No Cointegration --> ECM not ok.      
        xdiff=d.(x)
        ydiff=d.(y)
        ecm <- lm(d.(ydiff) ~ l.(ydiff) + d.(xdiff) + l.(xdiff) -1)
        
        ecmresults[i+((j-1)*sims),8] = lrm(ecm,alpha="l.(ydiff)", beta="l.(xdiff)")
        ecmresults[i+((j-1)*sims),9] = lrm.se(ecm,alpha="l.(ydiff)", beta="l.(xdiff)")
      }
    } else {
      # Are both variables I(0)?
      if (treat$dint.x ==  0 & treat$dint.y==0) {
        ecmresults[i+((j-1)*sims),10] = 3 # Stationary --> ECM ok.    
        ecm <- lm(d.(y) ~ l.(y) + d.(x) + l.(x) -1)
        ecmresults[i+((j-1)*sims),8] = lrm(ecm)
        ecmresults[i+((j-1)*sims),9] = lrm.se(ecm)
        
      } else {
        # Either x or y is I(1)
        if (treat$dint.x==1 & treat$dint.y==0) {
          ecmresults[i+((j-1)*sims),10] = 4 #  X integrated, y not
          xdiff=d.(x)
          ecm <- lm(d.(y) ~ l.(y) + d.(xdiff) + l.(xdiff) -1)
          
          ecmresults[i+((j-1)*sims),8] = lrm(ecm,beta="l.(xdiff)")
          ecmresults[i+((j-1)*sims),9] = lrm.se(ecm,beta="l.(xdiff)")
          
        } else {  
          if (treat$dint.x==0 & treat$dint.y==1) {
            ecmresults[i+((j-1)*sims),10] = 5 #  Y integrated, X not
            ydiff=d.(y)
            ecm <- lm(d.(ydiff) ~ l.(ydiff) + d.(x) + l.(x) -1)
            
            ecmresults[i+((j-1)*sims),8] = lrm(ecm,alpha="l.(ydiff)")
            ecmresults[i+((j-1)*sims),9] = lrm.se(ecm,alpha="l.(ydiff)")
          }
        }
      }
    }
    
    # Save estimate and SE of parameters
    ecmresults[i+((j-1)*sims),2] = summary(ecm)$coef[1,1]
    ecmresults[i+((j-1)*sims),3] = summary(ecm)$coef[1,2]
    ecmresults[i+((j-1)*sims),4] = summary(ecm)$coef[2,1]
    ecmresults[i+((j-1)*sims),5] = summary(ecm)$coef[2,2]
    ecmresults[i+((j-1)*sims),6] = summary(ecm)$coef[3,1]
    ecmresults[i+((j-1)*sims),7] = summary(ecm)$coef[3,2]
    
        
    ###################################################################################################################
    ## Estimate model with fractional considerations
    ###################################################################################################################
    
    ###############################################################################################################
    ## FIND BEST ARFIMA MODELS FOR X, Y, and FECM
    ###############################################################################################################
    
    x.arfima = bestarfima(x,int=treat$dint.x,max.nar=treat$max.nar,max.nma=treat$max.nma)
    y.arfima = bestarfima(y,int=treat$dint.y,max.nar=treat$max.nar,max.nma=treat$max.nma)
    
    # Save output
    x.fd = x.arfima$residuals
    y.fd = y.arfima$residuals
    fdresults[i+((j-1)*sims),8] = x.arfima$d # Estimate of d for x
    fdresults[i+((j-1)*sims),9] = y.arfima$d # Estimate of d for y
    fdresults[i+((j-1)*sims),11] = x.arfima$phi # Estimate of AR components for x
    fdresults[i+((j-1)*sims),12] = y.arfima$phi # Estimate of AR components for y
    fdresults[i+((j-1)*sims),14] = x.arfima$theta # Estimate of AR components for x
    fdresults[i+((j-1)*sims),15] = y.arfima$theta # Estimate of AR components for y
    fdresults[i+((j-1)*sims),17] = x.arfima$dint # Int required for x?
    fdresults[i+((j-1)*sims),18] = y.arfima$dint # Int required for y?
    fdresults[i+((j-1)*sims),20] = x.arfima$sigma # Estimated sd for x
    fdresults[i+((j-1)*sims),21] = y.arfima$sigma # Estimated sd for y
    fdresults[i+((j-1)*sims),23] = x.arfima$mu # Estimated mean for x
    fdresults[i+((j-1)*sims),24] = y.arfima$mu # Estimated mean for y
    
    # ARFIMA model for residual from 1st stage model
    ecmres=lm(y~x)$resid
    fecm.arfima = bestarfima(ecmres,treat$dint.ey,max.nar=treat$max.nar,max.nma=treat$max.nma) 
    fecm.fd = fecm.arfima$residuals
    
    fdresults[i+((j-1)*sims),10] = fecm.arfima$d
    fdresults[i+((j-1)*sims),13] = fecm.arfima$phi
    fdresults[i+((j-1)*sims),16] = fecm.arfima$theta
    fdresults[i+((j-1)*sims),19] = fecm.arfima$dint
    fdresults[i+((j-1)*sims),22] = fecm.arfima$sigma
    fdresults[i+((j-1)*sims),25] = fecm.arfima$mu
    
    
    ###############################################################################################################
    ## COMPARE ESTIMATES OF FRACTIONAL INTEGRATION
    ###############################################################################################################    
    
    # Are the two series of the same integration order? 
    # 95% confints of d
    x.low = x.arfima$d-1.96*x.arfima$d.se
    x.hi = x.arfima$d+1.96*x.arfima$d.se
    y.low = y.arfima$d-1.96*y.arfima$d.se
    y.hi = y.arfima$d+1.96*y.arfima$d.se
    
    if (x.low <= y.hi && y.low <= x.hi && x.low!=0 && y.low!=0) { # Do they overlap?
      
      # Find conf int for d term in FECM
      fecm.low = fecm.arfima$d-1.96*fecm.arfima$d.se
      fecm.hi = fecm.arfima$d+1.96*fecm.arfima$d.se
      
      # Check whether d is lower for fecm. Note, if x and y are I(1), d is recorded
      # as a negative number. In such cases, d.fecm should be higher than d.x and d.y
      if ((fecm.hi < x.low & fecm.hi < y.low && x.low>0) ||  (fecm.hi < (x.low+1) & fecm.hi < (y.low+1) & x.arfima$d<0 & y.arfima$d<0 & fecm.arfima$d>0)) { 
        
        ###############################################################################################################
        ## FECM IS CORRECT SPECIFICATION
        ###############################################################################################################    
        
        # FECM is correct specification
        fdresults[i+((j-1)*sims),26] = 2 # "FECM - res I(d)" 
        ecm.fd<- lm(y.fd ~ l.(fecm.fd) + x.fd + l.(x.fd)-1)
        
      } else {
        # The series have same FI, but not fractionally cointegrated.
        
        ###############################################################################################################
        ## FECM IS NOT CORRECT SPECIFICATION
        ###############################################################################################################    
        
        fdresults[i+((j-1)*sims),26] = 3 # "Same FI - no FECM"
        ecm.fd<- lm(y.fd ~  l.(y.fd) + x.fd + l.(x.fd) - 1)              
      }
    } 
    
    else {
      ###############################################################################################################
      ## FECM IS NOT CORRECT SPECIFICATION
      ###############################################################################################################    
      
      # dfrac's do not overlap. The series are FI, but not fractionally cointegrated
      fdresults[i+((j-1)*sims),26] = 4 #  "Not same FI - no FECM"     
      ecm.fd<- lm(y.fd ~  l.(y.fd) + x.fd + l.(x.fd) - 1)
    }
    
    # Save estimates and SEs
    fdresults[i+((j-1)*sims),2] = summary(ecm.fd)$coef[1,1]
    fdresults[i+((j-1)*sims),3] = summary(ecm.fd)$coef[1,2]
    fdresults[i+((j-1)*sims),4] = summary(ecm.fd)$coef[2,1]
    fdresults[i+((j-1)*sims),5] = summary(ecm.fd)$coef[2,2]        
    fdresults[i+((j-1)*sims),6] = summary(ecm.fd)$coef[3,1]
    fdresults[i+((j-1)*sims),7] = summary(ecm.fd)$coef[3,2]
    
  }
  
  close(prog)
  save(ecmresults,file=paste(format(Sys.time(),'_%Y%m%d_%H%M%S'),"_ecm.Rdata",sep=""))
  save(fdresults,file=paste(format(Sys.time(),'_%Y%m%d_%H%M%S'),"_fd.Rdata",sep=""))
}

proc.time() - ptm

###################################################################################################################
## ANALYZE RESULTS
###################################################################################################################

fd=as.data.frame(fdresults)
fd$model = "Fractional Model"
ecm=as.data.frame(ecmresults)
ecm$model = "General Error Correction Model"

  ###################################################################################################################
  ## SHORT-RUN EFFECT
  ###################################################################################################################

    # Merge estimates from ECM and FD
    fd.short = subset(fd, select=c(model,cond,b.dx,b.dx.se), model== "Fractional Model")
    ecm.short = subset(ecm, select=c(model,cond,b0,b0.se), model!= "Fractional Model")
    colnames(fd.short)= c("model","cond","b0","b0.se")
    short = rbind(ecm.short,fd.short)
  
    # Merge above dataframe with info on conditions
    cond$cond = 1:nrow(cond)
    cond$dynamics = with(cond, ifelse(phi.ey>0,1,0))
    cond$dynamics = factor(cond$dynamics, levels=c("0","1"), labels=c("ARFIMA (0,d,0)","ARFIMA (1,d,0)"))
    cond$relation = with(cond, ifelse(beta.x==0&dfrac.x==0.4,"Null (d=0.4)",ifelse(beta.x==0&dfrac.x==-0.2, "Null (d=0.8)",
                               ifelse(beta.x>0&dfrac.x==0.4, "Cointegration (d=0.4)", "Cointegration (d=0.8)"))))
    cond$relation = factor(cond$relation, levels=c("Null (d=0.4)","Null (d=0.8)","Cointegration (d=0.4)","Cointegration (d=0.8)"), ordered=TRUE)
    data=merge(x=short,y=cond, by="cond")
  
  # Calculate evaluation metrics
  
  # Absolute bias
    data$abs.bias = with(data,abs(b0-(beta.x)))

  # Coverage of 95% confidence intervals
    cov95 <- ddply(data, .(model,N,dynamics,relation), summarize, cp=coverage(b0,b0.se,beta.x)$cp)  
    cov95$cp = sprintf("%.0f%%", 100*cov95$cp)
    cov95 = unique(cov95)
    
  
  # Plot graph
  shortplot=ggplot(data, aes(x =as.factor(N),y = abs.bias, fill=model)) + 
  geom_boxplot(position=position_dodge(width=0.8)) +
  facet_grid(relation ~ dynamics) +
  geom_text(data=cov95, aes(label=cp, y = -0.02,ymax=0.1), 
            size=5, position=position_dodge(width=0.8)) + expand_limits(y=-0.025) +
  theme_bw() + 
  scale_y_continuous(name=expression(paste("Absolute bias |",hat(beta)-beta,"|")))  +
  scale_x_discrete(name="Sample size") +
  scale_fill_grey(name="",start = 0.6, end = .9) +
  theme(legend.position="bottom", legend.background = element_rect(fill=alpha('grey', 0.2))) + 
  guides(fill=guide_legend(nrow=2,byrow=TRUE)) +
  theme(text = element_text(size=20))
  
  ggsave(
  "shortplot.pdf",
  shortplot,
  width = 12,
  height = 15,
  dpi = 600
  )
  

  ###################################################################################################################
  ## LONG-RUN EFFECT
  ###################################################################################################################

  # 1) Establish true effect for each of the conditions. 

  # Predict effect up to t=20
    x20.true = matrix(NA,nrow(cond),20)
    y20.true = matrix(NA,nrow(cond),20)
  
  for (i in 1:nrow(cond)) {
    # One unit change simulated in X. Note that to get a clean one unit shift, we must have extra zeros 
    # at previous timepoints before passing shock into arfima function
    ex <- c(rep(0, 500), 1, rep(0, 19))
    x20 <- arfima.sim(n = length(ex), model=list(phi = cond$phi.x[i], dfrac = cond$dfrac.x[i], dint = cond$dint.x[i]),innov = ex)
    y20 <- arfima.sim(n = length(x20),innov = cond$beta.x[i]*x20)
    y20.true[i,] = y20[501:520] 
  }
  
  # Cumulative sum of effects on y
    y20.true.cumsum =  t(apply(y20.true,1,cumsum))
  
  # Now we save true effect at t=5,10,20 - both y value and y cumulative change
    cond$y5 = y20.true[,5]
    cond$y10 = y20.true[,10]
    cond$y20 = y20.true[,20]
    cond$y5cumsum = y20.true.cumsum[,5]
    cond$y10cumsum = y20.true.cumsum[,10]
    cond$y20cumsum = y20.true.cumsum[,20]
  

  # 2) Predict from model estimates

  # 2a) GECM
    ecm.long = subset(ecm, select=c(model,cond,type,a,b0,b1))
  
  # Matrix to save prediction results
    y.hat.gecm = matrix(NA,nrow(ecm.long),20)
  
  for (j in 1:nrow(cond)) {
    # Simulate estimated X as either integrated or stationary
    d.x = c(0,1,rep(0,19))
    if (cond$dint.x[j]==1) {
      x= cumsum(d.x)
    } else {
      x=d.x
    }
  
    d = subset(ecm.long, cond==j)
    cat("\n", "\n", "Condition", j, "of", nrow(cond),"\n")
    prog <- txtProgressBar(min=0, max=nrow(d), style=3)
    
    # Now we predict changes in y for each simulation in condition j
    
    for (k in 1:nrow(d)) {
    setTxtProgressBar(prog, value=k) 
    
    # Type gives info on what kind of model was fit
    # For ECMs: 1=GECM, Integrated; 2=First differences; 3=GECM,Stationary; (No cases of 4=X first differences; 5=Y first differenced)
    
    if (d$type[k]==2) { # Fit model in first differences
        x.hat=d.x[-1]
    } else {
        x.hat=x[-1]
    } 
            
      x.l <- l.(x.hat,0) # Lagged value of x
      y.l <- 0 # Lagged value of y
      d.y.hat <- c() # Predicted change in y
        
      for(t in 1:length(x.hat)){ 
        d.y.hat[t] <- d$a[k]*y.l[t]+d$b0[k]*d.x[t]+d$b1[k]*x.l[t]
        y.l[t+1] <- d.y.hat[t] + y.l[t] 
      }
  
    if (d$type[k]==2) { # Model was fit in first differences
      y.l[1:20] = cumsum(y.l[1:20])
    }
    
      y.hat.gecm[((j-1)*nrow(d)+k),]= y.l[1:20]    
    }
  }
  
  # Cumulative sum of effects on y
    y.hat.gecm.cumsum =  t(apply(y.hat.gecm,1,cumsum))
  
  # Now we save estimated effect at t=5,10,20 - both y value and y cumulative change
    ecm.long$y5.hat = y.hat.gecm[,5]
    ecm.long$y10.hat = y.hat.gecm[,10]
    ecm.long$y20.hat = y.hat.gecm[,20]
    ecm.long$y5cumsum.hat = y.hat.gecm.cumsum[,5]
    ecm.long$y10cumsum.hat = y.hat.gecm.cumsum[,10]
    ecm.long$y20cumsum.hat = y.hat.gecm.cumsum[,20]
  

  # 2b) Predictions for FDs
  fd.long = subset(fd, select=c(model,cond,type,fecm,b.dx,b.lx,
                                d.x,phi.x,theta.x,dint.x,sigma.x,mu.x,
                                d.y,phi.y,theta.y,dint.y,sigma.y,mu.y,
                                d.fecm,phi.fecm,theta.fecm,dint.fecm,sigma.fecm,mu.fecm))
  
  # Matrix to save prediction results
    y.hat.fd = matrix(NA,nrow(fd.long),20)
  
  for (j in 1:nrow(fd.long)) {
    # Simulate estimated X change
    ex <- c(rep(0, 100), 1, rep(0, 19))
    if (fd.long$theta.x[j]==0) {
      x.hat <- arfima.sim(n = length(ex), 
                          model=list(phi = fd.long$phi.x[j], dfrac = fd.long$d.x[j], 
                                                     dint = fd.long$dint.x[j]),innov = ex)
    } else {
      x.hat <- arfima.sim(n = length(ex), 
                          model=list(phi = fd.long$phi.x[j], theta = fd.long$theta.x[j], 
                                                     dfrac = fd.long$d.x[j], dint = fd.long$dint.x[j]),
                                                     innov = ex)
    }
    
    # Now we predict changes in y for each simulation in condition
      y.hat <- arfima.sim(n = length(ex), innov = fd.long$b.dx[j]*x.hat)    
      
      y.hat.fd[j,]= y.hat[101:120]    
    }
  
  # Cumulative sum of effects on y
    y.hat.fd.cumsum =  t(apply(y.hat.fd,1,cumsum))
  
  # Now we save estimated effect at t=5,10,20 - both y value and y cumulative change
    fd.long$y5.hat = y.hat.fd[,5]
    fd.long$y10.hat = y.hat.fd[,10]
    fd.long$y20.hat = y.hat.fd[,20]
    fd.long$y5cumsum.hat = y.hat.fd.cumsum[,5]
    fd.long$y10cumsum.hat = y.hat.fd.cumsum[,10]
    fd.long$y20cumsum.hat = y.hat.fd.cumsum[,20]
  
  # Now we bind together predictions
    ecm.long.pred = subset(ecm.long, select=c("model","cond","type","y10.hat","y10cumsum.hat","y20.hat","y20cumsum.hat"))
    fd.long.pred = subset(fd.long, select=c("model","cond","type","y10.hat","y10cumsum.hat","y20.hat","y20cumsum.hat","d.x","d.y"))
    long = rbind.fill(ecm.long.pred,fd.long.pred)
  
  # Bind with condition info
    data=merge(x=long,y=cond, by="cond")
  
  # Calculate evaluation metrics
  data$pred.err.10 = with(data,(y10.hat-y10))
  data$pred.err.10cum=data$pred.err.10cum = with(data,(y10cumsum.hat-y10cumsum))
  
  # Prediction Error for d=0.4
  long4=ggplot(subset(data,cond>16), aes(x =as.factor(N),y = pred.err.10cum, fill=model)) + 
    geom_boxplot(position=position_dodge(width=0.8)) +
    facet_grid(relation ~ dynamics, scales="free_y") +
    theme_bw() + 
    scale_y_continuous(name=expression(paste("Prediction error at t=10")))  +
    scale_x_discrete(name="Sample size") +
    scale_fill_grey(name="",start = 0.6, end = .9) +
    theme(legend.position="bottom", legend.background = element_rect(fill=alpha('grey', 0.2))) + 
    guides(fill=guide_legend(nrow=2,byrow=TRUE)) +
    theme(text = element_text(size=20))
  
  ggsave(
    "long4.pdf",
    long4,
    width = 12,
    height = 10,
    dpi = 600
  )
  
  # Prediction Error for d = 0.8
  long8=ggplot(subset(data,cond<17), aes(x =as.factor(N),y = pred.err.10cum, fill=model)) + 
    geom_boxplot(position=position_dodge(width=0.8)) +
    facet_grid(relation ~ dynamics, scales="free") +
    theme_bw() + 
    scale_y_continuous(name=expression(paste("Prediction error at t=10")))  +
    scale_x_discrete(name="Sample size") +
    scale_fill_grey(name="",start = 0.6, end = .9) +
    theme(legend.position="bottom", legend.background = element_rect(fill=alpha('grey', 0.2))) + 
    guides(fill=guide_legend(nrow=2,byrow=TRUE)) +
    theme(text = element_text(size=20))
  
  ggsave(
    "long8.pdf",
    long8,
    width = 12,
    height = 10,
    dpi = 600
  )
  
